*==============================================================================*
*                      Assessment of Tier 2 Exhaust Standards:                 *
*                      Do New Predict Used Vehicle Emissions?                  *
*==============================================================================*
* this table uses colorado IM240 data


u "dataSTATA/combined/combined_smogcheck_colorado.dta", clear
merge m:1 vin10 using "dataSTATA/concordances/vin_linkedto_newcars.dta", keep(3) nogen
replace vin10 = substr(vin,1,8)+substr(vin,10,2)


* non-missing new vehicle emissions tests for all pollutants including CO2 
* OJO: this drops a lot of the sample, why? is it dropping model years 1982-3?
keep if emissions_new_CO < . & emissions_new_HC < . & emissions_new_NOX < . & emissions_new_CO2 < .

* non-missing used vehicle emissions tests for all pollutants including CO2
keep if emissions_used_CO < . & emissions_used_HC < . & emissions_used_NOX < . & emissions_used_CO2 < .

* 2.2% of sample has a zero reading for some pollutant.
egen rowmin = rowmin(emissions_used_* emissions_new_*)
drop if rowmin <= 0
drop rowmin


foreach v of varlist emissions_used_* emissions_new_* {
	g ln`v' = ln(`v')
}

g age5 = age >= 4 & age <= 6 

replace odometer = odometer / 100000
g odometer_2 = odometer ^ 2

g ldtXmodel_year = ldt * model_year

destring *std, replace
ren v_co_std  standard_smogcheck_CO
ren v_hc_std  standard_smogcheck_HC
ren v_nox_std standard_smogcheck_NOX
g lnstandard_smogcheck_CO  = ln(standard_smogcheck_CO)
g lnstandard_smogcheck_HC  = ln(standard_smogcheck_HC)
g lnstandard_smogcheck_NOX = ln(standard_smogcheck_NOX)

* so the regression doesn't generate error. but don't need real values because
* there are no smogcheck co2 standards.
g standard_smogcheck_CO2   = 1
g lnstandard_smogcheck_CO2 = 1

g lncafe_standard = ln(cafe_standard)
g lngasCostPerMile = ln(gasCostPerMile)
* replacing missing gas costs (i.e., missing MPG since not in vin decoder?)
* with mean gas cost by model year*ldt, so it doesn't drop 2.5% of obs
egen meanGasCost = mean(gasCostPerMile), by(model_year ldt)
replace lngasCostPerMile = ln(meanGasCost) if lngasCostPerMile >= .
g lnso2_per_gallon = ln(so2_per_gallon)

g lnsh_ethanol     = ln(sh_ethanol)

loc col = 1

mat M = J(5,60,.)

foreach p in CO HC NOX CO2 {
	xi: reg        lnemissions_used_`p'   lnemissions_new_`p' if fulltest == 1,                                        cluster(vin10)
		est store m1_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'] = r(mean)
	
	xi: reghdfe    lnemissions_used_`p'   lnemissions_new_`p' ldt odometer odometer_2 if fulltest == 1,                cluster(vin10) absorb(model_year age)
		est store m2_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'+1] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'+1] = r(mean)

	xi: reghdfe    lnemissions_used_`p'   lnemissions_new_`p' ldt odometer odometer_2 lncafe_standard lnstandard_smogcheck_`p' lngasCostPerMile lnsh_ethanol lnso2_per_gallon if fulltest == 1,                cluster(vin10) absorb(model_year age)
		est store m3_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'+2] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'+2] = r(mean)

	xi: reghdfe    lnemissions_used_`p'   lnemissions_new_`p' ldt odometer odometer_2 ldtXmodel_year if fulltest == 1, cluster(vin10) absorb(model_year age)
		est store m4_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'+3] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'+3] = r(mean)

	xi: reghdfe    lnemissions_used_`p'   lnemissions_new_`p' ldt odometer odometer_2 if age5 == 1 & fulltest == 1,   cluster(vin10) absorb(model_year age)
		est store m5_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'+4] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'+4] = r(mean)
	
	xi: reghdfe  emissions_used_`p' emissions_new_`p' ldt odometer odometer_2 if fulltest == 1,                cluster(vin10) absorb(model_year age)
		est store m6_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'+5] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'+5] = r(mean)
		
	xi: reghdfe    lnemissions_used_`p'   lnemissions_new_`p' ldt odometer odometer_2,                cluster(vin10) absorb(model_year age)
		est store m7_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'+6] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'+6] = r(mean)

	xi: reghdfe    emissions_used_`p'   emissions_new_`p' ldt odometer odometer_2,                cluster(vin10) absorb(model_year age)
		est store m8_`p'
		qui su emissions_used_`p' if e(sample)
		mat M[1,`col'+7] = r(mean)
		qui su emissions_new_`p' if e(sample)
		mat M[2,`col'+7] = r(mean)

	loc col = `col' + 8
	
}


svmat M

preserve
	keep M1-M60
	keep in 1/2
	outsheet using "results/tables/2_t4_tier2_xmean.txt", replace 
restore

g n = _n
reshape long emissions_used_ emissions_new_ lnemissions_used_ lnemissions_new_ lnstandard_smogcheck_ standard_smogcheck_, i(n) j(pollutant) string

keep if inlist(pollutant, "CO", "HC", "NOX")


egen model_yearXpollutant = group(model_year pollutant)
egen ageXpollutant = group(age pollutant)

xi: reghdfe    lnemissions_used_ lnemissions_new_ if fulltest == 1,                                        cluster(vin10) absorb(pollutant)
	est store m1_all

xi: reghdfe    lnemissions_used_ lnemissions_new_ ldt odometer odometer_2 if fulltest == 1,                cluster(vin10) absorb(model_yearXpollutant ageXpollutant)
	est store m2_all

xi: reghdfe    lnemissions_used_ lnemissions_new_ ldt odometer odometer_2 lncafe_standard lnstandard_smogcheck_ lngasCostPerMile lnsh_ethanol lnso2_per_gallon if fulltest == 1,                cluster(vin10) absorb(model_yearXpollutant ageXpollutant)
	est store m3_all

xi: reghdfe    lnemissions_used_   lnemissions_new_ ldt odometer odometer_2 ldtXmodel_year if fulltest == 1, cluster(vin10) absorb(model_yearXpollutant ageXpollutant)
	est store m4_all

xi: reghdfe    lnemissions_used_   lnemissions_new_ ldt odometer odometer_2 if age5 == 1 & fulltest == 1,   cluster(vin10) absorb(model_yearXpollutant ageXpollutant)
	est store m5_all

xi: reghdfe  emissions_used_ emissions_new_ ldt odometer odometer_2 if fulltest == 1,                cluster(vin10) absorb(model_yearXpollutant ageXpollutant)
	est store m6_all
	
xi: reghdfe    lnemissions_used_   lnemissions_new_ ldt odometer odometer_2,                cluster(vin10) absorb(model_yearXpollutant ageXpollutant)
	est store m7_all

xi: reghdfe    emissions_used_   emissions_new_ ldt odometer odometer_2,                cluster(vin10) absorb(model_yearXpollutant ageXpollutant)
	est store m8_all

estout m*all m*CO m*HC m*NOX m*CO2 ///
using "results/tables/2_t4_tier2.txt", ///
	cells( b(star fmt(%9.2f)) se(par(`"="("'`")""'))) ///
	title("Do new vehicle tests predict used vehicle emissions?") style(tab) keep(*new*) ///
	stats(N N_clust, fmt(0 0 3) labels(N "Clusters")) label collabels(, none) ///
	starlevels(* .10 ** .05 *** .01) replace

	
	